In [3]:
import warnings
warnings.filterwarnings("ignore") # specify to ignore warning messages
%matplotlib inline
from sklearn.metrics import mean_squared_error
import math
from math import sqrt
import pandas as pd
from pandas import Series
from pandas import DataFrame
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import pyplot
import datetime
from dateutil.relativedelta import relativedelta
import seaborn as sns
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf 
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.arima_model import ARIMAResults
df = pd.read_csv('C:\\Python Library\\RMG Data\\RMG New Data\\CPU Utilization every 10 minutes from 1stFeb to 31st March.csv')
df_new = pd.read_csv('C:\\Python Library\\RMG Data\\RMG New Data\\New Data - CPU.csv')
df = df.append(df_new, ignore_index = True)
Out[3]:
6408
In [4]:
df['timestamp'] = df[['TheDate','TIME_OF_DAY']].apply(lambda x: ' '.join(x), axis=1)
df.head()
Out[4]:
CPU_IDLE CPU_SYS CPU_USR CPU_WIO TIME_OF_DAY TheDate Total CPU Weekday timestamp
0 30.15 8.78 52.31 8.76 0:00:00 2/1/2018 61.09 Thursday 2/1/2018 0:00:00
1 19.47 9.79 39.78 30.96 0:10:00 2/1/2018 49.57 Thursday 2/1/2018 0:10:00
2 33.79 9.24 41.22 15.75 0:20:00 2/1/2018 50.46 Thursday 2/1/2018 0:20:00
3 41.23 5.94 45.67 7.16 0:30:00 2/1/2018 51.61 Thursday 2/1/2018 0:30:00
4 24.14 8.45 50.62 16.79 0:40:00 2/1/2018 59.07 Thursday 2/1/2018 0:40:00
In [3]:
df['timestamp'] = pd.to_datetime(df['timestamp'], format = '%m/%d/%Y %H:%M:%S')
df = df.drop_duplicates(subset=['timestamp'], keep = 'first')
df.reset_index(inplace=True)
In [4]:
df['Hour'] = df.timestamp.apply(lambda x: x.hour)
df['Hour'] = df['Hour'].apply(lambda x: '{0:0>2}'.format(x))
df['Min'] = df.timestamp.apply(lambda x: x.minute)
df['Min'] = df['Min'].apply(lambda x: '{0:0>2}'.format(x))
df['Month'] = df.timestamp.apply(lambda x: x.month)
df['Month'] = df['Month'].apply(lambda x: '{0:0>2}'.format(x))
df['Day'] = df.timestamp.apply(lambda x: x.day)
df['Day'] = df['Day'].apply(lambda x: '{0:0>2}'.format(x))
df['Year'] = df.timestamp.apply(lambda x: x.year)
df['Weekday'] = df['timestamp'].dt.weekday_name
df['Index'] = df['Year'].astype(str)+'-'+df['Month']+'-'+df['Day']+' '+df['Hour']+' '+df['Min']
df.head()
Out[4]:
index CPU_IDLE CPU_SYS CPU_USR CPU_WIO TIME_OF_DAY TheDate Total CPU Weekday timestamp Hour Min Month Day Year Index
0 0 30.15 8.78 52.31 8.76 0:00:00 2/1/2018 61.09 Thursday 2018-02-01 00:00:00 00 00 02 01 2018 2018-02-01 00 00
1 1 19.47 9.79 39.78 30.96 0:10:00 2/1/2018 49.57 Thursday 2018-02-01 00:10:00 00 10 02 01 2018 2018-02-01 00 10
2 2 33.79 9.24 41.22 15.75 0:20:00 2/1/2018 50.46 Thursday 2018-02-01 00:20:00 00 20 02 01 2018 2018-02-01 00 20
3 3 41.23 5.94 45.67 7.16 0:30:00 2/1/2018 51.61 Thursday 2018-02-01 00:30:00 00 30 02 01 2018 2018-02-01 00 30
4 4 24.14 8.45 50.62 16.79 0:40:00 2/1/2018 59.07 Thursday 2018-02-01 00:40:00 00 40 02 01 2018 2018-02-01 00 40
In [5]:
df['CPU_Busy'] = df['CPU_USR'] + df['CPU_SYS']
df1 = df[['Index','CPU_Busy']]
df1.head()
Out[5]:
Index CPU_Busy
0 2018-02-01 00 00 61.09
1 2018-02-01 00 10 49.57
2 2018-02-01 00 20 50.46
3 2018-02-01 00 30 51.61
4 2018-02-01 00 40 59.07
In [6]:
df1['Index'] = pd.to_datetime(df1['Index'], format = '%Y-%m-%d %H %M')
df1['Weekday'] = df1['Index'].dt.weekday_name
df1.head()
Out[6]:
Index CPU_Busy Weekday
0 2018-02-01 00:00:00 61.09 Thursday
1 2018-02-01 00:10:00 49.57 Thursday
2 2018-02-01 00:20:00 50.46 Thursday
3 2018-02-01 00:30:00 51.61 Thursday
4 2018-02-01 00:40:00 59.07 Thursday
In [7]:
df1 = df1.set_index('Index')
df1 = df1.resample('10Min').interpolate(method='linear')
df1.reset_index(inplace=True)
df1['Weekday'] = df1['Index'].dt.weekday_name
print(df1.count())
Index       7488
CPU_Busy    7488
Weekday     7488
dtype: int64

Prediction for Monday's

In [8]:
warnings.filterwarnings("ignore") # specify to ignore warning messages
df_mon = df1[df1['Weekday'] == 'Monday']
del df_mon['Weekday']
del df_mon['Index']
df_mon.reset_index(inplace=True)
del df_mon['index']
length = len(df_mon.index)-1
df_mon.drop(df_mon.index[length], inplace=True)
start = datetime.datetime.strptime("00:00:00",'%H:%M:%S')
time_list = [start + relativedelta(minutes=x*10) for x in range(0,length)]
df_mon['index'] = time_list
df_mon.set_index(['index'], inplace=True)
df_mon.index.name=None
In [10]:
#Summary Statistics of series
print(df_mon.describe())
          CPU_Busy
count  1007.000000
mean     46.087895
std      12.735145
min       7.920000
25%      37.670000
50%      45.780000
75%      53.710000
max      92.070000
In [11]:
#Time series plot
ax = df_mon.CPU_Busy.plot(figsize=(10,4), title= 'Hourly CPU Utilization Time Series Plot', fontsize=8)
ax.set(xlabel="Days", ylabel="CPU Utilization Percentage")
Out[11]:
[<matplotlib.text.Text at 0x1f2c364d1d0>,
 <matplotlib.text.Text at 0x1f2c35f9a90>]
In [12]:
decomposition = seasonal_decompose(df_mon.values, freq=72)
fig = plt.figure() 
fig = decomposition.plot()
fig.set_size_inches(12, 5)
<matplotlib.figure.Figure at 0x1f2c56bca90>
In [13]:
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_mon.CPU_Busy)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
ADF Statistic: -4.503869
p-value: 0.000193
Critical Values:
	1%: -3.437
	5%: -2.864
	10%: -2.568
In [14]:
fig = plt.figure(figsize=(8,4))
pyplot.figure(figsize = (8,4))
pyplot.subplot(211)
plot_acf(df_mon.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.subplot(212)
plot_pacf(df_mon.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.show()
<matplotlib.figure.Figure at 0x1f2c57a6d30>
In [15]:
mod = sm.tsa.statespace.SARIMAX(df_mon['CPU_Busy'],
                                order=(8, 0, 0),
                                seasonal_order=(1, 1, 1, 72),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results_mon = mod.fit()

print(results_mon.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                           CPU_Busy   No. Observations:                 1007
Model:             SARIMAX(8, 0, 0)x(1, 1, 1, 72)   Log Likelihood               -3281.909
Date:                            Thu, 19 Apr 2018   AIC                           6585.818
Time:                                    19:15:32   BIC                           6639.880
Sample:                                01-01-1900   HQIC                          6606.358
                                     - 01-07-1900                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.1649      0.034      4.879      0.000       0.099       0.231
ar.L2          0.0781      0.036      2.166      0.030       0.007       0.149
ar.L3          0.1938      0.034      5.704      0.000       0.127       0.260
ar.L4          0.1183      0.035      3.422      0.001       0.051       0.186
ar.L5          0.0403      0.035      1.160      0.246      -0.028       0.108
ar.L6          0.1191      0.034      3.471      0.001       0.052       0.186
ar.L7         -0.0116      0.035     -0.333      0.739      -0.080       0.057
ar.L8          0.0042      0.036      0.118      0.906      -0.066       0.074
ar.S.L72      -0.3765      0.036    -10.396      0.000      -0.447      -0.305
ma.S.L72      -0.6593      0.039    -16.921      0.000      -0.736      -0.583
sigma2       120.8594      5.594     21.603      0.000     109.894     131.824
===================================================================================
Ljung-Box (Q):                       69.70   Jarque-Bera (JB):                27.53
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):               1.03   Skew:                             0.33
Prob(H) (two-sided):                  0.82   Kurtosis:                         3.57
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [16]:
results_mon.plot_diagnostics(figsize=(10, 7))
plt.show()
In [17]:
#Producing and Visualizing future forecasts (72 intervals in future)
pred_uc_mon = results_mon.get_forecast(steps=73)
pred_ci_mon = pred_uc_mon.conf_int(alpha = 0.1)
pred_ci_mon = pred_ci_mon.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})
In [18]:
ax = df_mon.CPU_Busy[-73:].plot(label='observed', figsize=(10,4))
pred_uc_mon.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci_mon.index,
                pred_ci_mon.iloc[:, 0],
                pred_ci_mon.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Day')
ax.set_ylabel('CPU Load % Mondays')

plt.legend()
plt.show()
In [19]:
pred_ci_mon['forecast'] = (pred_ci_mon['lower_CI'] + pred_ci_mon['upper_CI'])/2

pred_ci_mon.loc[pred_ci_mon['lower_CI'] < 0, 'lower_CI'] = 0
pred_ci_mon.loc[pred_ci_mon['upper_CI'] > 100, 'upper_CI'] = 100
pred_ci_mon.loc[pred_ci_mon['forecast'] > 100, 'forecast'] = 100
final_pred_mon = pred_ci_mon[1:]
final_pred_mon.head(10)
final_pred_mon.to_csv('C:\\Python Library\\RMG Data\\forcast mon.csv')
In [20]:
fig = plt.figure(figsize=(12,4))
sns.tsplot([final_pred_mon.upper_CI[1:], final_pred_mon.forecast[1:],
            final_pred_mon.lower_CI[1:]],ci = [0,100], color="indianred")
Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x1f28129b198>
In [21]:
CPU_pred = 15
start_time = str('02:00:00')
end_time = str('05:00:00')

mod = df_mon.between_time(start_time,end_time) + CPU_pred
df_mon_new = pd.merge(df_mon, mod, left_index=True, right_index=True, how='outer')
df_mon_new = df_mon_new.fillna(0)
df_mon_new['CPU_Busy'] = df_mon_new[['CPU_Busy_x', 'CPU_Busy_y']].max(axis=1)
del df_mon_new['CPU_Busy_x']
del df_mon_new['CPU_Busy_y']
df_mon_new.head()
Out[21]:
CPU_Busy
1900-01-01 00:00:00 34.37
1900-01-01 00:10:00 45.37
1900-01-01 00:20:00 42.44
1900-01-01 00:30:00 50.75
1900-01-01 00:40:00 31.15
In [22]:
## Augmented Dickey Fuller Test ##
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_mon_new.CPU_Busy)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
ADF Statistic: -4.851171
p-value: 0.000043
Critical Values:
	1%: -3.437
	5%: -2.864
	10%: -2.568
In [23]:
## Plotting ACF and PACF
fig = plt.figure(figsize=(10,4))
pyplot.figure(figsize = (10,4))
pyplot.subplot(211)
plot_acf(df_mon_new.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.subplot(212)
plot_pacf(df_mon_new.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.show()
<matplotlib.figure.Figure at 0x1f280eb4c88>
In [24]:
mod = sm.tsa.statespace.SARIMAX(df_mon_new['CPU_Busy'],
                                order=(8, 0, 0),
                                seasonal_order=(1, 1, 1, 72),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results_mon_new = mod.fit()

print(results_mon_new.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                           CPU_Busy   No. Observations:                 1007
Model:             SARIMAX(8, 0, 0)x(1, 1, 1, 72)   Log Likelihood               -3293.618
Date:                            Thu, 19 Apr 2018   AIC                           6609.236
Time:                                    19:20:43   BIC                           6663.298
Sample:                                01-01-1900   HQIC                          6629.777
                                     - 01-07-1900                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.1820      0.034      5.388      0.000       0.116       0.248
ar.L2          0.0802      0.036      2.238      0.025       0.010       0.150
ar.L3          0.2023      0.034      5.918      0.000       0.135       0.269
ar.L4          0.1145      0.034      3.373      0.001       0.048       0.181
ar.L5          0.0338      0.035      0.974      0.330      -0.034       0.102
ar.L6          0.1206      0.034      3.537      0.000       0.054       0.187
ar.L7         -0.0114      0.035     -0.328      0.743      -0.079       0.057
ar.L8          0.0121      0.036      0.335      0.738      -0.059       0.083
ar.S.L72      -0.4282      0.035    -12.126      0.000      -0.497      -0.359
ma.S.L72      -0.6271      0.039    -16.061      0.000      -0.704      -0.551
sigma2       124.9141      5.768     21.656      0.000     113.609     136.219
===================================================================================
Ljung-Box (Q):                       68.45   Jarque-Bera (JB):                23.70
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):               1.04   Skew:                             0.30
Prob(H) (two-sided):                  0.72   Kurtosis:                         3.55
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [25]:
#Producing and Visualizing future forecasts (72 intervals in future)
pred_uc_mon_new = results_mon_new.get_forecast(steps=73)
pred_ci_mon_new = pred_uc_mon_new.conf_int(alpha = 0.1)
pred_ci_mon_new = pred_ci_mon_new.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})
In [26]:
ax = df_mon_new.CPU_Busy[-73:].plot(label='observed', figsize=(10,5))
pred_uc_mon_new.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci_mon_new.index,
                pred_ci_mon_new.iloc[:, 0],
                pred_ci_mon_new.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Day')
ax.set_ylabel('New CPU Load % Mondays')

plt.legend()
plt.show()
In [27]:
pred_ci_mon_new['forecast'] = (pred_ci_mon_new['lower_CI'] + pred_ci_mon_new['upper_CI'])/2

pred_ci_mon_new.loc[pred_ci_mon_new['lower_CI'] < 0, 'lower_CI'] = 0
pred_ci_mon_new.loc[pred_ci_mon_new['upper_CI'] > 100, 'upper_CI'] = 100
pred_ci_mon_new.loc[pred_ci_mon_new['forecast'] > 100, 'forecast'] = 100
final_pred_mon_new = pred_ci_mon_new[1:]
final_pred_mon_new.head()
Out[27]:
lower_CI upper_CI forecast
1900-01-08 00:00:00 33.971473 71.342768 52.657121
1900-01-08 00:10:00 25.802886 63.405579 44.604232
1900-01-08 00:20:00 37.046000 75.649718 56.347859
1900-01-08 00:30:00 42.629543 81.952275 62.290909
1900-01-08 00:40:00 31.648498 71.277172 51.462835
In [28]:
fig = plt.figure(figsize = (12,4))
ax  = fig.add_subplot(111)
ax.plot(final_pred_mon.index, final_pred_mon['forecast'], c='b', label='Base Forecast',linewidth = 3.0)
ax.plot(final_pred_mon_new.index, final_pred_mon_new['forecast'], c='r', label='Expected Shift',linewidth = 2.0)
#ax.plot([0,len(final_pred_mon.index)],[80,80], linewidth=3)

leg = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
# get the lines and texts inside legend box
leg_lines = leg.get_lines()
leg_texts = leg.get_texts()
# bulk-set the properties of all lines and texts
plt.setp(leg_lines, linewidth=3)
plt.setp(leg_texts, fontsize='medium')
plt.title('Impact Analysis of New Job', fontsize = 'medium')
plt.show()
#fig.savefig('C:/Users/manoj.e.kumar.sharma/Desktop/Graphs/Impact Graphs/Impact Analysis-Mon.png')

Prediction for Tuesday's

In [29]:
warnings.filterwarnings("ignore") # specify to ignore warning messages
df_tue = df1[df1['Weekday'] == 'Tuesday']
del df_tue['Weekday']
del df_tue['Index']
df_tue.reset_index(inplace=True)
del df_tue['index']
length = len(df_tue.index)-1
df_tue.drop(df_tue.index[length], inplace=True)
start = datetime.datetime.strptime("00:00:00",'%H:%M:%S')
time_list = [start + relativedelta(minutes=x*10) for x in range(0,length)]
df_tue['index'] = time_list
df_tue.set_index(['index'], inplace=True)
df_tue.index.name=None
In [31]:
#Summary Statistics of series
print(df_tue.describe())
          CPU_Busy
count  1007.000000
mean     43.498928
std      12.848357
min      11.660000
25%      34.130000
50%      42.290000
75%      51.855000
max      92.680000
In [32]:
#Time series plot
ax = df_tue.CPU_Busy.plot(figsize=(10,4), title= 'Hourly CPU Utilization Time Series Plot', fontsize=8)
ax.set(xlabel="Days", ylabel="CPU Utilization Percentage")
Out[32]:
[<matplotlib.text.Text at 0x1f2c5a029e8>,
 <matplotlib.text.Text at 0x1f280e499b0>]
In [33]:
decomposition = seasonal_decompose(df_tue.values, freq=72)
fig = plt.figure() 
fig = decomposition.plot()
fig.set_size_inches(12, 5)
<matplotlib.figure.Figure at 0x1f2c5a15b00>
In [34]:
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_tue.CPU_Busy)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
ADF Statistic: -4.136988
p-value: 0.000841
Critical Values:
	1%: -3.437
	5%: -2.864
	10%: -2.568
In [35]:
fig = plt.figure(figsize=(8,4))
pyplot.figure(figsize = (8,4))
pyplot.subplot(211)
plot_acf(df_tue.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.subplot(212)
plot_pacf(df_tue.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.show()
<matplotlib.figure.Figure at 0x1f2c5a155c0>
In [36]:
mod = sm.tsa.statespace.SARIMAX(df_tue['CPU_Busy'],
                                order=(6, 1, 2),
                                seasonal_order=(1, 1, 1, 72),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results_tue = mod.fit()

print(results_tue.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                           CPU_Busy   No. Observations:                 1007
Model:             SARIMAX(6, 1, 2)x(1, 1, 1, 72)   Log Likelihood               -3281.556
Date:                            Thu, 19 Apr 2018   AIC                           6585.112
Time:                                    19:32:14   BIC                           6639.174
Sample:                                01-01-1900   HQIC                          6605.652
                                     - 01-07-1900                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.3395      0.361     -0.940      0.347      -1.047       0.368
ar.L2         -0.0057      0.043     -0.130      0.896      -0.091       0.079
ar.L3          0.0751      0.043      1.752      0.080      -0.009       0.159
ar.L4          0.1327      0.051      2.592      0.010       0.032       0.233
ar.L5          0.0948      0.052      1.821      0.069      -0.007       0.197
ar.L6          0.1020      0.037      2.753      0.006       0.029       0.175
ma.L1         -0.5937      0.363     -1.636      0.102      -1.305       0.118
ma.L2         -0.3335      0.338     -0.986      0.324      -0.997       0.330
ar.S.L72      -0.3513      0.038     -9.261      0.000      -0.426      -0.277
ma.S.L72      -0.7512      0.042    -18.076      0.000      -0.833      -0.670
sigma2       117.0265      5.639     20.752      0.000     105.973     128.080
===================================================================================
Ljung-Box (Q):                       70.94   Jarque-Bera (JB):                 8.30
Prob(Q):                              0.00   Prob(JB):                         0.02
Heteroskedasticity (H):               0.96   Skew:                             0.21
Prob(H) (two-sided):                  0.71   Kurtosis:                         3.23
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [37]:
results_tue.plot_diagnostics(figsize=(10, 7))
plt.show()
In [38]:
#Producing and Visualizing future forecasts (72 intervals in future)
pred_uc_tue = results_tue.get_forecast(steps=73)
pred_ci_tue = pred_uc_tue.conf_int(alpha = 0.1)
pred_ci_tue = pred_ci_tue.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})
In [39]:
ax = df_tue.CPU_Busy[-73:].plot(label='observed', figsize=(10,4))
pred_uc_tue.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci_tue.index,
                pred_ci_tue.iloc[:, 0],
                pred_ci_tue.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Day')
ax.set_ylabel('CPU Load % Tuesdays')

plt.legend()
plt.show()
In [40]:
pred_ci_tue['forecast'] = (pred_ci_tue['lower_CI'] + pred_ci_tue['upper_CI'])/2

pred_ci_tue.loc[pred_ci_tue['lower_CI'] < 0, 'lower_CI'] = 0
pred_ci_tue.loc[pred_ci_tue['upper_CI'] > 100, 'upper_CI'] = 100
pred_ci_tue.loc[pred_ci_tue['forecast'] > 100, 'forecast'] = 100
final_pred_tue = pred_ci_tue[1:]
final_pred_tue.head(10)
final_pred_tue.to_csv('C:\\Python Library\\RMG Data\\forcast tue.csv')
In [41]:
fig = plt.figure(figsize=(12,4))
sns.tsplot([final_pred_tue.upper_CI[1:], final_pred_tue.forecast[1:],
            final_pred_tue.lower_CI[1:]],ci = [0,100], color="indianred")
Out[41]:
<matplotlib.axes._subplots.AxesSubplot at 0x1f2810463c8>
In [42]:
CPU_pred = 15
start_time = str('02:00:00')
end_time = str('05:00:00')

mod = df_tue.between_time(start_time,end_time) + CPU_pred
df_tue_new = pd.merge(df_tue, mod, left_index=True, right_index=True, how='outer')
df_tue_new = df_tue_new.fillna(0)
df_tue_new['CPU_Busy'] = df_tue_new[['CPU_Busy_x', 'CPU_Busy_y']].max(axis=1)
del df_tue_new['CPU_Busy_x']
del df_tue_new['CPU_Busy_y']
df_tue_new.head()
Out[42]:
CPU_Busy
1900-01-01 00:00:00 36.37
1900-01-01 00:10:00 41.56
1900-01-01 00:20:00 33.07
1900-01-01 00:30:00 26.66
1900-01-01 00:40:00 43.31
In [43]:
                                        ## Augmented Dickey Fuller Test ##
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_tue_new.CPU_Busy)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
ADF Statistic: -4.369571
p-value: 0.000336
Critical Values:
	1%: -3.437
	5%: -2.864
	10%: -2.568
In [44]:
                            ##  Plotting ACF and PACF  ##
fig = plt.figure(figsize=(10,4))
pyplot.figure(figsize = (10,4))
pyplot.subplot(211)
plot_acf(df_tue_new.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.subplot(212)
plot_pacf(df_tue_new.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.show()
<matplotlib.figure.Figure at 0x1f280fca780>
In [45]:
mod = sm.tsa.statespace.SARIMAX(df_tue_new['CPU_Busy'],
                                order=(6, 1, 2),
                                seasonal_order=(1, 1, 1, 72),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results_tue_new = mod.fit()

print(results_tue_new.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                           CPU_Busy   No. Observations:                 1007
Model:             SARIMAX(6, 1, 2)x(1, 1, 1, 72)   Log Likelihood               -3292.016
Date:                            Thu, 19 Apr 2018   AIC                           6606.032
Time:                                    19:41:53   BIC                           6660.094
Sample:                                01-01-1900   HQIC                          6626.572
                                     - 01-07-1900                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.3730      0.412     -0.906      0.365      -1.180       0.434
ar.L2         -0.0103      0.045     -0.229      0.819      -0.099       0.078
ar.L3          0.0619      0.045      1.363      0.173      -0.027       0.151
ar.L4          0.1172      0.052      2.251      0.024       0.015       0.219
ar.L5          0.0791      0.053      1.484      0.138      -0.025       0.184
ar.L6          0.0859      0.038      2.272      0.023       0.012       0.160
ma.L1         -0.6772      0.404     -1.675      0.094      -1.470       0.115
ma.L2         -0.4127      0.437     -0.944      0.345      -1.269       0.444
ar.S.L72      -0.3843      0.037    -10.416      0.000      -0.457      -0.312
ma.S.L72      -0.7386      0.041    -18.081      0.000      -0.819      -0.658
sigma2       106.1422      6.928     15.321      0.000      92.564     119.720
===================================================================================
Ljung-Box (Q):                       69.65   Jarque-Bera (JB):                 8.37
Prob(Q):                              0.00   Prob(JB):                         0.02
Heteroskedasticity (H):               0.95   Skew:                             0.21
Prob(H) (two-sided):                  0.68   Kurtosis:                         3.26
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [46]:
#Producing and Visualizing future forecasts (72 intervals in future)
pred_uc_tue_new = results_tue_new.get_forecast(steps=73)
pred_ci_tue_new = pred_uc_tue_new.conf_int(alpha = 0.1)
pred_ci_tue_new = pred_ci_tue_new.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})
In [47]:
ax = df_tue_new.CPU_Busy[-73:].plot(label='observed', figsize=(10,6))
pred_uc_tue_new.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci_tue_new.index,
                pred_ci_tue_new.iloc[:, 0],
                pred_ci_tue_new.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Day')
ax.set_ylabel('New CPU Load % Tuesday')

plt.legend()
plt.show()
In [48]:
pred_ci_tue_new['forecast'] = (pred_ci_tue_new['lower_CI'] + pred_ci_tue_new['upper_CI'])/2

pred_ci_tue_new.loc[pred_ci_tue_new['lower_CI'] < 0, 'lower_CI'] = 0
pred_ci_tue_new.loc[pred_ci_tue_new['upper_CI'] > 100, 'upper_CI'] = 100
pred_ci_tue_new.loc[pred_ci_tue_new['forecast'] > 100, 'forecast'] = 100
final_pred_tue_new = pred_ci_tue_new[1:]
final_pred_tue_new.head()
Out[48]:
lower_CI upper_CI forecast
1900-01-08 00:00:00 33.411402 69.605357 51.508379
1900-01-08 00:10:00 23.358732 59.590670 41.474701
1900-01-08 00:20:00 34.002273 70.529359 52.265816
1900-01-08 00:30:00 25.752593 62.721850 44.237221
1900-01-08 00:40:00 25.202019 62.403364 43.802691
In [49]:
fig = plt.figure(figsize = (12,4))
ax  = fig.add_subplot(111)
ax.plot(final_pred_tue.index, final_pred_tue['forecast'], c='b', label='Base Forecast',linewidth = 3.0)
ax.plot(final_pred_tue_new.index, final_pred_tue_new['forecast'], c='r', label='Expected Shift',linewidth = 2.0)
#ax.plot([0,len(final_pred_tue.index)],[80,80], linewidth=3)

leg = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
# get the lines and texts inside legend box
leg_lines = leg.get_lines()
leg_texts = leg.get_texts()
# bulk-set the properties of all lines and texts
plt.setp(leg_lines, linewidth=3)
plt.setp(leg_texts, fontsize='medium')
plt.title('Impact Analysis of New Job', fontsize = 'medium')
plt.show()
#fig.savefig('C:/Users/manoj.e.kumar.sharma/Desktop/Graphs/Impact Graphs/Impact Analysis-Tue.png')

Prediction for Wednesday's

In [50]:
warnings.filterwarnings("ignore") # specify to ignore warning messages
df_wed = df1[df1['Weekday'] == 'Wednesday']
del df_wed['Weekday']
del df_wed['Index']
df_wed.reset_index(inplace=True)
del df_wed['index']
length = len(df_wed.index)-1
df_wed.drop(df_wed.index[length], inplace=True)
start = datetime.datetime.strptime("00:00:00",'%H:%M:%S')
time_list = [start + relativedelta(minutes=x*10) for x in range(0,length)]
df_wed['index'] = time_list
df_wed.set_index(['index'], inplace=True)
df_wed.index.name=None
In [52]:
#Summary Statistics of series
print(df_wed.describe())
          CPU_Busy
count  1007.000000
mean     42.681184
std      13.957146
min      12.220000
25%      31.497128
50%      40.850000
75%      50.735000
max      94.670000
In [53]:
#Time series plot
ax = df_wed.CPU_Busy.plot(figsize=(10,4), title= 'Hourly CPU Utilization Time Series Plot', fontsize=8)
ax.set(xlabel="Days", ylabel="CPU Utilization Percentage")
Out[53]:
[<matplotlib.text.Text at 0x1f28125d6d8>,
 <matplotlib.text.Text at 0x1f2812a9f98>]
In [54]:
decomposition = seasonal_decompose(df_wed.values, freq=72)
fig = plt.figure() 
fig = decomposition.plot()
fig.set_size_inches(12, 5)
<matplotlib.figure.Figure at 0x1f281278390>
In [55]:
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_wed.CPU_Busy)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
ADF Statistic: -4.812085
p-value: 0.000052
Critical Values:
	1%: -3.437
	5%: -2.864
	10%: -2.568
In [56]:
fig = plt.figure(figsize=(8,4))
pyplot.figure(figsize = (8,4))
pyplot.subplot(211)
plot_acf(df_wed.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.subplot(212)
plot_pacf(df_wed.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.show()
<matplotlib.figure.Figure at 0x1f281284c18>
In [57]:
mod = sm.tsa.statespace.SARIMAX(df_wed['CPU_Busy'],
                                order=(4, 1, 3),
                                seasonal_order=(1, 1, 1, 72),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results_wed = mod.fit()

print(results_wed.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                           CPU_Busy   No. Observations:                 1007
Model:             SARIMAX(4, 1, 3)x(1, 1, 1, 72)   Log Likelihood               -3370.649
Date:                            Thu, 19 Apr 2018   AIC                           6761.299
Time:                                    19:53:23   BIC                           6810.446
Sample:                                01-01-1900   HQIC                          6779.972
                                     - 01-07-1900                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.3913      2.414     -0.162      0.871      -5.123       4.340
ar.L2          0.0740      0.967      0.077      0.939      -1.822       1.970
ar.L3          0.0691      0.194      0.356      0.722      -0.312       0.450
ar.L4          0.0649      0.245      0.265      0.791      -0.415       0.545
ma.L1         -0.3176      2.416     -0.131      0.895      -5.052       4.417
ma.L2         -0.5518      2.611     -0.211      0.833      -5.670       4.567
ma.L3         -0.0162      0.498     -0.033      0.974      -0.993       0.960
ar.S.L72      -0.2253      0.041     -5.462      0.000      -0.306      -0.144
ma.S.L72      -0.7617      0.041    -18.646      0.000      -0.842      -0.682
sigma2       140.9535      6.460     21.819      0.000     128.292     153.615
===================================================================================
Ljung-Box (Q):                       41.27   Jarque-Bera (JB):                38.11
Prob(Q):                              0.41   Prob(JB):                         0.00
Heteroskedasticity (H):               0.90   Skew:                             0.43
Prob(H) (two-sided):                  0.37   Kurtosis:                         3.57
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [58]:
results_wed.plot_diagnostics(figsize=(10, 7))
plt.show()
In [59]:
#Producing and Visualizing future forecasts (72 intervals in future)
pred_uc_wed = results_wed.get_forecast(steps=73)
pred_ci_wed = pred_uc_wed.conf_int(alpha = 0.1)
pred_ci_wed = pred_ci_wed.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})
In [60]:
ax = df_wed.CPU_Busy[-73:].plot(label='observed', figsize=(10,4))
pred_uc_wed.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci_wed.index,
                pred_ci_wed.iloc[:, 0],
                pred_ci_wed.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Day')
ax.set_ylabel('CPU Load % Wednesday')

plt.legend()
plt.show()
In [61]:
pred_ci_wed['forecast'] = (pred_ci_wed['lower_CI'] + pred_ci_wed['upper_CI'])/2

pred_ci_wed.loc[pred_ci_wed['lower_CI'] < 0, 'lower_CI'] = 0
pred_ci_wed.loc[pred_ci_wed['upper_CI'] > 100, 'upper_CI'] = 100
pred_ci_wed.loc[pred_ci_wed['forecast'] > 100, 'forecast'] = 100
final_pred_wed = pred_ci_wed[1:]
final_pred_wed.head(10)
final_pred_wed.to_csv('C:\\Python Library\\RMG Data\\forcast wed.csv')
In [62]:
fig = plt.figure(figsize=(12,4))
sns.tsplot([final_pred_wed.upper_CI[1:], final_pred_wed.forecast[1:],
            final_pred_wed.lower_CI[1:]],ci = [0,100], color="indianred")
Out[62]:
<matplotlib.axes._subplots.AxesSubplot at 0x1f28fdb50f0>
In [63]:
CPU_pred = 15
start_time = str('02:00:00')
end_time = str('05:00:00')

mod = df_wed.between_time(start_time,end_time) + CPU_pred
df_wed_new = pd.merge(df_wed, mod, left_index=True, right_index=True, how='outer')
df_wed_new = df_wed_new.fillna(0)
df_wed_new['CPU_Busy'] = df_wed_new[['CPU_Busy_x', 'CPU_Busy_y']].max(axis=1)
del df_wed_new['CPU_Busy_x']
del df_wed_new['CPU_Busy_y']
df_wed_new.head()
Out[63]:
CPU_Busy
1900-01-01 00:00:00 32.053114
1900-01-01 00:10:00 31.996228
1900-01-01 00:20:00 31.939343
1900-01-01 00:30:00 31.882457
1900-01-01 00:40:00 31.825571
In [64]:
                                        ## Augmented Dickey Fuller Test ##
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_wed_new.CPU_Busy)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
ADF Statistic: -5.105675
p-value: 0.000014
Critical Values:
	1%: -3.437
	5%: -2.864
	10%: -2.568
In [65]:
                            ##  Plotting ACF and PACF  ##
fig = plt.figure(figsize=(10,4))
pyplot.figure(figsize = (10,4))
pyplot.subplot(211)
plot_acf(df_wed_new.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.subplot(212)
plot_pacf(df_wed_new.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.show()
<matplotlib.figure.Figure at 0x1f2905dca90>
In [66]:
mod = sm.tsa.statespace.SARIMAX(df_wed_new['CPU_Busy'],
                                order=(4, 1, 3),
                                seasonal_order=(1, 1, 1, 72),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results_wed_new = mod.fit()

print(results_wed_new.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                           CPU_Busy   No. Observations:                 1007
Model:             SARIMAX(4, 1, 3)x(1, 1, 1, 72)   Log Likelihood               -3379.224
Date:                            Thu, 19 Apr 2018   AIC                           6778.448
Time:                                    20:01:14   BIC                           6827.596
Sample:                                01-01-1900   HQIC                          6797.121
                                     - 01-07-1900                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.0186      4.975     -0.004      0.997      -9.770       9.732
ar.L2         -0.0024      1.762     -0.001      0.999      -3.456       3.451
ar.L3          0.1011      0.325      0.312      0.755      -0.535       0.737
ar.L4          0.0375      0.553      0.068      0.946      -1.047       1.122
ma.L1         -0.6740      4.976     -0.135      0.892     -10.427       9.079
ma.L2         -0.2107      5.186     -0.041      0.968     -10.374       9.953
ma.L3         -0.0220      0.610     -0.036      0.971      -1.219       1.174
ar.S.L72      -0.2555      0.041     -6.200      0.000      -0.336      -0.175
ma.S.L72      -0.7475      0.040    -18.850      0.000      -0.825      -0.670
sigma2       144.3877      6.642     21.739      0.000     131.370     157.406
===================================================================================
Ljung-Box (Q):                       40.24   Jarque-Bera (JB):                28.15
Prob(Q):                              0.46   Prob(JB):                         0.00
Heteroskedasticity (H):               0.91   Skew:                             0.37
Prob(H) (two-sided):                  0.45   Kurtosis:                         3.48
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [67]:
#Producing and Visualizing future forecasts (72 intervals in future)
pred_uc_wed_new = results_wed_new.get_forecast(steps=73)
pred_ci_wed_new = pred_uc_wed_new.conf_int(alpha = 0.1)
pred_ci_wed_new = pred_ci_wed_new.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})
In [68]:
ax = df_wed_new.CPU_Busy[-73:].plot(label='observed', figsize=(10,6))
pred_uc_wed_new.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci_wed_new.index,
                pred_ci_wed_new.iloc[:, 0],
                pred_ci_wed_new.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Day')
ax.set_ylabel('New CPU Load % Wednesday')

plt.legend()
plt.show()
In [69]:
pred_ci_wed_new['forecast'] = (pred_ci_wed_new['lower_CI'] + pred_ci_wed_new['upper_CI'])/2

pred_ci_wed_new.loc[pred_ci_wed_new['lower_CI'] < 0, 'lower_CI'] = 0
pred_ci_wed_new.loc[pred_ci_wed_new['upper_CI'] > 100, 'upper_CI'] = 100
pred_ci_wed_new.loc[pred_ci_wed_new['forecast'] > 100, 'forecast'] = 100
final_pred_wed_new = pred_ci_wed_new[1:]
final_pred_wed_new.head()
Out[69]:
lower_CI upper_CI forecast
1900-01-08 00:00:00 37.424981 78.785315 58.105148
1900-01-08 00:10:00 22.650406 64.227387 43.438896
1900-01-08 00:20:00 21.685641 63.947503 42.816572
1900-01-08 00:30:00 26.918862 69.639736 48.279299
1900-01-08 00:40:00 24.419057 67.369713 45.894385
In [70]:
fig = plt.figure(figsize = (12,4))
ax  = fig.add_subplot(111)
ax.plot(final_pred_wed.index, final_pred_wed['forecast'], c='b', label='Base Forecast',linewidth = 3.0)
ax.plot(final_pred_wed_new.index, final_pred_wed_new['forecast'], c='r', label='Expected Shift',linewidth = 2.0)
#ax.plot([0,len(final_pred_wed.index)],[80,80], linewidth=3)

leg = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
# get the lines and texts inside legend box
leg_lines = leg.get_lines()
leg_texts = leg.get_texts()
# bulk-set the properties of all lines and texts
plt.setp(leg_lines, linewidth=3)
plt.setp(leg_texts, fontsize='medium')
plt.title('Impact Analysis of New Job', fontsize = 'medium')
plt.show()
#fig.savefig('C:/Users/manoj.e.kumar.sharma/Desktop/Graphs/Impact Graphs/Impact Analysis-Wed.png')

Prediction for Thursday's

In [71]:
warnings.filterwarnings("ignore") # specify to ignore warning messages
df_thu = df1[df1['Weekday'] == 'Thursday']
del df_thu['Weekday']
del df_thu['Index']
df_thu.reset_index(inplace=True)
del df_thu['index']
length = len(df_thu.index)-1
df_thu.drop(df_thu.index[length], inplace=True)
start = datetime.datetime.strptime("00:00:00",'%H:%M:%S')
time_list = [start + relativedelta(minutes=x*10) for x in range(0,length)]
df_thu['index'] = time_list
df_thu.set_index(['index'], inplace=True)
df_thu.index.name=None
In [73]:
#Summary Statistics of series
print(df_thu.describe())
          CPU_Busy
count  1151.000000
mean     45.207932
std      14.533468
min      15.260000
25%      37.583207
50%      44.444345
75%      60.305862
max      95.190000
In [74]:
#Time series plot
ax = df_thu.CPU_Busy.plot(figsize=(10,4), title= 'Hourly CPU Utilization Time Series Plot', fontsize=8)
ax.set(xlabel="Days", ylabel="CPU Utilization Percentage")
Out[74]:
[<matplotlib.text.Text at 0x1f2810d9710>,
 <matplotlib.text.Text at 0x1f280f25cf8>]
In [75]:
decomposition = seasonal_decompose(df_thu.values, freq=72)
fig = plt.figure() 
fig = decomposition.plot()
fig.set_size_inches(12, 5)
<matplotlib.figure.Figure at 0x1f281046c88>
In [76]:
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_thu.CPU_Busy)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
ADF Statistic: -1.566344
p-value: 0.500474
Critical Values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
In [77]:
fig = plt.figure(figsize=(8,4))
pyplot.figure(figsize = (8,4))
pyplot.subplot(211)
plot_acf(df_thu.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.subplot(212)
plot_pacf(df_thu.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.show()
<matplotlib.figure.Figure at 0x1f291293f98>
In [78]:
mod = sm.tsa.statespace.SARIMAX(df_thu['CPU_Busy'],
                                order=(0, 2, 1),
                                seasonal_order=(1, 1, 1, 72),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results_thu = mod.fit()

print(results_thu.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                           CPU_Busy   No. Observations:                 1151
Model:             SARIMAX(0, 2, 1)x(1, 1, 1, 72)   Log Likelihood               -2756.220
Date:                            Thu, 19 Apr 2018   AIC                           5520.440
Time:                                    20:04:48   BIC                           5540.633
Sample:                                01-01-1900   HQIC                          5528.062
                                     - 01-08-1900                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ma.L1         -1.0000     12.589     -0.079      0.937     -25.673      23.673
ar.S.L72      -0.3273      0.006    -51.437      0.000      -0.340      -0.315
ma.S.L72       0.4740      0.009     51.173      0.000       0.456       0.492
sigma2        13.9437    175.550      0.079      0.937    -330.128     358.015
===================================================================================
Ljung-Box (Q):                      268.90   Jarque-Bera (JB):             60167.80
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):               0.19   Skew:                            -0.83
Prob(H) (two-sided):                  0.00   Kurtosis:                        40.91
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [79]:
results_thu.plot_diagnostics(figsize=(10, 7))
plt.show()
In [80]:
#Producing and Visualizing future forecasts (72 intervals in future)
pred_uc_thu = results_thu.get_forecast(steps=73)
pred_ci_thu = pred_uc_thu.conf_int(alpha = 0.1)
pred_ci_thu = pred_ci_thu.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})
In [81]:
ax = df_thu.CPU_Busy[-73:].plot(label='observed', figsize=(10,4))
pred_uc_thu.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci_thu.index,
                pred_ci_thu.iloc[:, 0],
                pred_ci_thu.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Day')
ax.set_ylabel('CPU Load % Tuesdays')

plt.legend()
plt.show()
In [82]:
pred_ci_thu['forecast'] = (pred_ci_thu['lower_CI'] + pred_ci_thu['upper_CI'])/2

pred_ci_thu.loc[pred_ci_thu['lower_CI'] < 0, 'lower_CI'] = 0
pred_ci_thu.loc[pred_ci_thu['upper_CI'] > 100, 'upper_CI'] = 100
pred_ci_thu.loc[pred_ci_thu['forecast'] > 100, 'forecast'] = 100
final_pred_thu = pred_ci_thu[1:]
final_pred_thu.head(10)
final_pred_thu.to_csv('C:\\Python Library\\RMG Data\\forcast thu.csv')
In [83]:
fig = plt.figure(figsize=(12,4))
sns.tsplot([final_pred_thu.upper_CI[1:], final_pred_thu.forecast[1:],
            final_pred_thu.lower_CI[1:]],ci = [0,100], color="indianred")
Out[83]:
<matplotlib.axes._subplots.AxesSubplot at 0x1f293bc8630>
In [84]:
CPU_pred = 15
start_time = str('02:00:00')
end_time = str('05:00:00')

mod = df_thu.between_time(start_time,end_time) + CPU_pred
df_thu_new = pd.merge(df_thu, mod, left_index=True, right_index=True, how='outer')
df_thu_new = df_thu_new.fillna(0)
df_thu_new['CPU_Busy'] = df_thu_new[['CPU_Busy_x', 'CPU_Busy_y']].max(axis=1)
del df_thu_new['CPU_Busy_x']
del df_thu_new['CPU_Busy_y']
df_thu_new.head()
Out[84]:
CPU_Busy
1900-01-01 00:00:00 61.09
1900-01-01 00:10:00 49.57
1900-01-01 00:20:00 50.46
1900-01-01 00:30:00 51.61
1900-01-01 00:40:00 59.07
In [85]:
                                        ## Augmented Dickey Fuller Test ##
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_thu_new.CPU_Busy)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
ADF Statistic: -2.209904
p-value: 0.202670
Critical Values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
In [86]:
                            ##  Plotting ACF and PACF  ##
fig = plt.figure(figsize=(10,4))
pyplot.figure(figsize = (10,4))
pyplot.subplot(211)
plot_acf(df_thu_new.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.subplot(212)
plot_pacf(df_thu_new.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.show()
<matplotlib.figure.Figure at 0x1f293b78fd0>
In [87]:
mod = sm.tsa.statespace.SARIMAX(df_thu_new['CPU_Busy'],
                                order=(0, 2, 1),
                                seasonal_order=(1, 1, 1, 72),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results_thu_new = mod.fit()

print(results_thu_new.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                           CPU_Busy   No. Observations:                 1151
Model:             SARIMAX(0, 2, 1)x(1, 1, 1, 72)   Log Likelihood               -2890.260
Date:                            Thu, 19 Apr 2018   AIC                           5788.521
Time:                                    20:06:51   BIC                           5808.715
Sample:                                01-01-1900   HQIC                          5796.143
                                     - 01-08-1900                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ma.L1         -1.0000      7.309     -0.137      0.891     -15.325      13.325
ar.S.L72      -0.3414      0.008    -45.258      0.000      -0.356      -0.327
ma.S.L72       0.0880      0.013      6.656      0.000       0.062       0.114
sigma2        18.5254    135.411      0.137      0.891    -246.876     283.927
===================================================================================
Ljung-Box (Q):                      257.82   Jarque-Bera (JB):             18677.64
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):               0.18   Skew:                             0.42
Prob(H) (two-sided):                  0.00   Kurtosis:                        24.12
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [88]:
#Producing and Visualizing future forecasts (72 intervals in future)
pred_uc_thu_new = results_thu_new.get_forecast(steps=73)
pred_ci_thu_new = pred_uc_thu_new.conf_int(alpha = 0.1)
pred_ci_thu_new = pred_ci_thu_new.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})
In [89]:
ax = df_thu_new.CPU_Busy[-73:].plot(label='observed', figsize=(10,6))
pred_uc_thu_new.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci_thu_new.index,
                pred_ci_thu_new.iloc[:, 0],
                pred_ci_thu_new.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Day')
ax.set_ylabel('New CPU Load % Tuesday')

plt.legend()
plt.show()
In [90]:
pred_ci_thu_new['forecast'] = (pred_ci_thu_new['lower_CI'] + pred_ci_thu_new['upper_CI'])/2

pred_ci_thu_new.loc[pred_ci_thu_new['lower_CI'] < 0, 'lower_CI'] = 0
pred_ci_thu_new.loc[pred_ci_thu_new['upper_CI'] > 100, 'upper_CI'] = 100
pred_ci_thu_new.loc[pred_ci_thu_new['forecast'] > 100, 'forecast'] = 100
final_pred_thu_new = pred_ci_thu_new[1:]
final_pred_thu_new.head()
Out[90]:
lower_CI upper_CI forecast
1900-01-09 00:00:00 49.034517 69.078486 59.056501
1900-01-09 00:10:00 46.798433 71.359216 59.078824
1900-01-09 00:20:00 44.914028 73.288267 59.101147
1900-01-09 00:30:00 43.254027 74.992913 59.123470
1900-01-09 00:40:00 41.753192 76.538395 59.145793
In [91]:
fig = plt.figure(figsize = (12,4))
ax  = fig.add_subplot(111)
ax.plot(final_pred_thu.index, final_pred_thu['forecast'], c='b', label='Base Forecast',linewidth = 3.0)
ax.plot(final_pred_thu_new.index, final_pred_thu_new['forecast'], c='r', label='Expected Shift',linewidth = 2.0)
#ax.plot([0,len(final_pred_thu.index)],[80,80], linewidth=3)

leg = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
# get the lines and texts inside legend box
leg_lines = leg.get_lines()
leg_texts = leg.get_texts()
# bulk-set the properties of all lines and texts
plt.setp(leg_lines, linewidth=3)
plt.setp(leg_texts, fontsize='medium')
plt.title('Impact Analysis of New Job', fontsize = 'medium')
plt.show()
#fig.savefig('C:/Users/manoj.e.kumar.sharma/Desktop/Graphs/Impact Graphs/Impact Analysis-thu.png')

Prediction for Friday's

In [92]:
warnings.filterwarnings("ignore") # specify to ignore warning messages
df_fri = df1[df1['Weekday'] == 'Friday']
del df_fri['Weekday']
del df_fri['Index']
df_fri.reset_index(inplace=True)
del df_fri['index']
length = len(df_fri.index)-1
df_fri.drop(df_fri.index[length], inplace=True)
start = datetime.datetime.strptime("00:00:00",'%H:%M:%S')
time_list = [start + relativedelta(minutes=x*10) for x in range(0,length)]
df_fri['index'] = time_list
df_fri.set_index(['index'], inplace=True)
df_fri.index.name=None
In [94]:
#Summary Statistics of series
print(df_fri.describe())
          CPU_Busy
count  1151.000000
mean     44.425187
std      13.116951
min       8.100000
25%      35.320000
50%      43.510000
75%      53.090000
max      89.930000
In [95]:
#Time series plot
ax = df_fri.CPU_Busy.plot(figsize=(10,4), title= 'Hourly CPU Utilization Time Series Plot', fontsize=8)
ax.set(xlabel="Days", ylabel="CPU Utilization Percentage")
Out[95]:
[<matplotlib.text.Text at 0x1f280f1d1d0>,
 <matplotlib.text.Text at 0x1f2c57bd048>]
In [96]:
decomposition = seasonal_decompose(df_fri.values, freq=72)
fig = plt.figure() 
fig = decomposition.plot()
fig.set_size_inches(12, 5)
<matplotlib.figure.Figure at 0x1f28fdcb048>
In [97]:
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_fri.CPU_Busy)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
ADF Statistic: -3.695331
p-value: 0.004179
Critical Values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
In [98]:
fig = plt.figure(figsize=(8,4))
pyplot.figure(figsize = (8,4))
pyplot.subplot(211)
plot_acf(df_fri.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.subplot(212)
plot_pacf(df_fri.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.show()
<matplotlib.figure.Figure at 0x1f28143fc50>
In [99]:
mod = sm.tsa.statespace.SARIMAX(df_fri['CPU_Busy'],
                                order=(3, 1, 3),
                                seasonal_order=(1, 1, 1, 72),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results_fri = mod.fit()

print(results_fri.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                           CPU_Busy   No. Observations:                 1151
Model:             SARIMAX(3, 1, 3)x(1, 1, 1, 72)   Log Likelihood               -3828.735
Date:                            Thu, 19 Apr 2018   AIC                           7675.469
Time:                                    20:15:59   BIC                           7720.904
Sample:                                01-01-1900   HQIC                          7692.619
                                     - 01-08-1900                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.9230      0.069    -13.472      0.000      -1.057      -0.789
ar.L2         -0.8571      0.047    -18.104      0.000      -0.950      -0.764
ar.L3          0.0537      0.041      1.317      0.188      -0.026       0.134
ma.L1          0.0889      0.063      1.421      0.155      -0.034       0.212
ma.L2         -0.0260      0.045     -0.575      0.565      -0.114       0.063
ma.L3         -0.7758      0.039    -19.691      0.000      -0.853      -0.699
ar.S.L72      -0.3778      0.035    -10.660      0.000      -0.447      -0.308
ma.S.L72      -0.7339      0.033    -22.037      0.000      -0.799      -0.669
sigma2       115.9546      5.262     22.036      0.000     105.641     126.268
===================================================================================
Ljung-Box (Q):                       73.51   Jarque-Bera (JB):                10.37
Prob(Q):                              0.00   Prob(JB):                         0.01
Heteroskedasticity (H):               0.94   Skew:                             0.25
Prob(H) (two-sided):                  0.57   Kurtosis:                         3.08
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [100]:
results_fri.plot_diagnostics(figsize=(10, 7))
plt.show()
In [101]:
#Producing and Visualizing future forecasts (72 intervals in future)
pred_uc_fri = results_fri.get_forecast(steps=73)
pred_ci_fri = pred_uc_fri.conf_int(alpha = 0.1)
pred_ci_fri = pred_ci_fri.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})
---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-101-7558fbbc2903> in <module>()
      1 #Producing and Visualizing future forecasts (72 intervals in future)
----> 2 pred_uc_fri = results_fri.get_forecast(steps=73)
      3 pred_ci_fri = pred_uc_fri.conf_int(alpha = 0.1)
      4 pred_ci_fri = pred_ci_fri.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})

C:\Users\manoj.e.kumar.sharma\Documents\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\mlemodel.py in get_forecast(self, steps, **kwargs)
   2313         else:
   2314             end = steps
-> 2315         return self.get_prediction(start=self.nobs, end=end, **kwargs)
   2316 
   2317     def predict(self, start=None, end=None, dynamic=False, **kwargs):

C:\Users\manoj.e.kumar.sharma\Documents\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\mlemodel.py in get_prediction(self, start, end, dynamic, **kwargs)
   2278         # case of npredictions = 1
   2279         prediction_results = self.filter_results.predict(
-> 2280             start, end+out_of_sample+1, dynamic, **kwargs
   2281         )
   2282 

C:\Users\manoj.e.kumar.sharma\Documents\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\kalman_filter.py in predict(self, start, end, dynamic, **kwargs)
   1556             model._initialize_state()
   1557 
-> 1558             results = self._predict(nstatic, ndynamic, nforecast, model)
   1559 
   1560         return PredictionResults(results, start, end, nstatic, ndynamic,

C:\Users\manoj.e.kumar.sharma\Documents\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\kalman_filter.py in _predict(self, nstatic, ndynamic, nforecast, model)
   1604         # Return the predicted state and predicted state covariance matrices
   1605         results = FilterResults(model)
-> 1606         results.update_filter(kfilter)
   1607         return results
   1608 

C:\Users\manoj.e.kumar.sharma\Documents\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\kalman_filter.py in update_filter(self, kalman_filter)
   1212         )
   1213         self.predicted_state_cov = np.array(
-> 1214             kalman_filter.predicted_state_cov, copy=True
   1215         )
   1216 

MemoryError: 
In [ ]:
ax = df_fri.CPU_Busy[-73:].plot(label='observed', figsize=(10,4))
pred_uc_fri.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci_fri.index,
                pred_ci_fri.iloc[:, 0],
                pred_ci_fri.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Day')
ax.set_ylabel('CPU Load % Fridays')

plt.legend()
plt.show()
In [ ]:
pred_ci_fri['forecast'] = (pred_ci_fri['lower_CI'] + pred_ci_fri['upper_CI'])/2

pred_ci_fri.loc[pred_ci_fri['lower_CI'] < 0, 'lower_CI'] = 0
pred_ci_fri.loc[pred_ci_fri['upper_CI'] > 100, 'upper_CI'] = 100
pred_ci_fri.loc[pred_ci_fri['forecast'] > 100, 'forecast'] = 100
final_pred_fri = pred_ci_fri[1:]
final_pred_fri.head(10)
final_pred_fri.to_csv('C:\\Python Library\\RMG Data\\forcast fri.csv')
In [ ]:
fig = plt.figure(figsize=(12,4))
sns.tsplot([final_pred_fri.upper_CI[1:], final_pred_fri.forecast[1:],
            final_pred_fri.lower_CI[1:]],ci = [0,100], color="indianred")
In [103]:
CPU_pred = 15
start_time = str('02:00:00')
end_time = str('05:00:00')

mod = df_fri.between_time(start_time,end_time) + CPU_pred
df_fri_new = pd.merge(df_fri, mod, left_index=True, right_index=True, how='outer')
df_fri_new = df_fri_new.fillna(0)
df_fri_new['CPU_Busy'] = df_fri_new[['CPU_Busy_x', 'CPU_Busy_y']].max(axis=1)
del df_fri_new['CPU_Busy_x']
del df_fri_new['CPU_Busy_y']
df_fri_new.head()
Out[103]:
CPU_Busy
1900-01-01 00:00:00 34.37
1900-01-01 00:10:00 30.92
1900-01-01 00:20:00 34.06
1900-01-01 00:30:00 38.60
1900-01-01 00:40:00 36.77
In [104]:
                                        ## Augmented Dickey Fuller Test ##
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_fri_new.CPU_Busy)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
ADF Statistic: -4.367326
p-value: 0.000339
Critical Values:
	10%: -2.568
	1%: -3.436
	5%: -2.864
In [105]:
                            ##  Plotting ACF and PACF  ##
fig = plt.figure(figsize=(10,4))
pyplot.figure(figsize = (10,4))
pyplot.subplot(211)
plot_acf(df_fri_new.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.subplot(212)
plot_pacf(df_fri_new.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.show()
<matplotlib.figure.Figure at 0x27f20308a58>
In [106]:
mod = sm.tsa.statespace.SARIMAX(df_fri_new['CPU_Busy'],
                                order=(3, 1, 3),
                                seasonal_order=(1, 1, 1, 72),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results_fri_new = mod.fit()

print(results_fri_new.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                           CPU_Busy   No. Observations:                 1151
Model:             SARIMAX(3, 1, 3)x(1, 1, 1, 72)   Log Likelihood               -3844.044
Date:                            Thu, 12 Apr 2018   AIC                           7706.089
Time:                                    15:16:24   BIC                           7751.524
Sample:                                01-01-1900   HQIC                          7723.239
                                     - 01-08-1900                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.9062      0.070    -12.997      0.000      -1.043      -0.770
ar.L2         -0.8417      0.047    -17.734      0.000      -0.935      -0.749
ar.L3          0.0694      0.041      1.696      0.090      -0.011       0.150
ma.L1          0.0837      0.064      1.316      0.188      -0.041       0.208
ma.L2         -0.0200      0.044     -0.453      0.651      -0.107       0.067
ma.L3         -0.7752      0.036    -21.307      0.000      -0.847      -0.704
ar.S.L72      -0.4271      0.035    -12.273      0.000      -0.495      -0.359
ma.S.L72      -0.7208      0.033    -21.969      0.000      -0.785      -0.657
sigma2       119.6423      5.418     22.081      0.000     109.022     130.262
===================================================================================
Ljung-Box (Q):                       74.40   Jarque-Bera (JB):                10.98
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):               0.92   Skew:                             0.26
Prob(H) (two-sided):                  0.45   Kurtosis:                         3.04
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [107]:
#Producing and Visualizing future forecasts (72 intervals in future)
pred_uc_fri_new = results_fri_new.get_forecast(steps=73)
pred_ci_fri_new = pred_uc_fri_new.conf_int(alpha = 0.1)
pred_ci_fri_new = pred_ci_fri_new.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})
In [108]:
ax = df_fri_new.CPU_Busy[-73:].plot(label='observed', figsize=(10,6))
pred_uc_fri_new.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci_fri_new.index,
                pred_ci_fri_new.iloc[:, 0],
                pred_ci_fri_new.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Day')
ax.set_ylabel('New CPU Load % Tuesday')

plt.legend()
plt.show()
In [109]:
pred_ci_fri_new['forecast'] = (pred_ci_fri_new['lower_CI'] + pred_ci_fri_new['upper_CI'])/2

pred_ci_fri_new.loc[pred_ci_fri_new['lower_CI'] < 0, 'lower_CI'] = 0
pred_ci_fri_new.loc[pred_ci_fri_new['upper_CI'] > 100, 'upper_CI'] = 100
pred_ci_fri_new.loc[pred_ci_fri_new['forecast'] > 100, 'forecast'] = 100
final_pred_fri_new = pred_ci_fri_new[1:]
final_pred_fri_new.head()
Out[109]:
lower_CI upper_CI forecast
1900-01-09 00:00:00 41.479879 78.026369 59.753124
1900-01-09 00:10:00 29.975320 66.587904 48.281612
1900-01-09 00:20:00 30.167278 67.191964 48.679621
1900-01-09 00:30:00 40.500497 77.738565 59.119531
1900-01-09 00:40:00 33.812904 71.121233 52.467069
In [110]:
fig = plt.figure(figsize = (12,4))
ax  = fig.add_subplot(111)
ax.plot(final_pred_fri.index, final_pred_fri['forecast'], c='b', label='Base Forecast',linewidth = 3.0)
ax.plot(final_pred_fri_new.index, final_pred_fri_new['forecast'], c='r', label='Expected Shift',linewidth = 2.0)
#ax.plot([0,len(final_pred_fri.index)],[80,80], linewidth=3)

leg = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
# get the lines and texts inside legend box
leg_lines = leg.get_lines()
leg_texts = leg.get_texts()
# bulk-set the properties of all lines and texts
plt.setp(leg_lines, linewidth=3)
plt.setp(leg_texts, fontsize='medium')
plt.title('Impact Analysis of New Job', fontsize = 'medium')
plt.show()
#fig.savefig('C:/Users/manoj.e.kumar.sharma/Desktop/Graphs/Impact Graphs/Impact Analysis-fri.png')

Prediction for Saturday's

In [9]:
warnings.filterwarnings("ignore") # specify to ignore warning messages
df_sat = df1[df1['Weekday'] == 'Saturday']
del df_sat['Weekday']
del df_sat['Index']
df_sat.reset_index(inplace=True)
del df_sat['index']
length = len(df_sat.index)-1
df_sat.drop(df_sat.index[length], inplace=True)
start = datetime.datetime.strptime("00:00:00",'%H:%M:%S')
time_list = [start + relativedelta(minutes=x*10) for x in range(0,length)]
df_sat['index'] = time_list
df_sat.set_index(['index'], inplace=True)
df_sat.index.name=None
In [12]:
#Summary Statistics of series
print(df_sat.describe())
          CPU_Busy
count  1151.000000
mean     40.396047
std      15.761480
min       0.810000
25%      30.560000
50%      40.600000
75%      51.015000
max      93.750000
In [13]:
#Time series plot
ax = df_sat.CPU_Busy.plot(figsize=(10,4), title= 'Hourly CPU Utilization Time Series Plot', fontsize=8)
ax.set(xlabel="Days", ylabel="CPU Utilization Percentage")
Out[13]:
[<matplotlib.text.Text at 0x1ed746f6b38>,
 <matplotlib.text.Text at 0x1ed747916d8>]
In [14]:
decomposition = seasonal_decompose(df_sat.values, freq=72)
fig = plt.figure() 
fig = decomposition.plot()
fig.set_size_inches(12, 5)
<matplotlib.figure.Figure at 0x1ed7682b278>
In [15]:
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_sat.CPU_Busy)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
ADF Statistic: -6.112497
p-value: 0.000000
Critical Values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
In [16]:
fig = plt.figure(figsize=(8,4))
pyplot.figure(figsize = (8,4))
pyplot.subplot(211)
plot_acf(df_sat.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.subplot(212)
plot_pacf(df_sat.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.show()
<matplotlib.figure.Figure at 0x1ed76962940>
In [68]:
mod = sm.tsa.statespace.SARIMAX(df_sat['CPU_Busy'],
                                order=(3, 1, 3),
                                seasonal_order=(1, 1, 1, 72),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results_sat = mod.fit()

print(results_sat.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                           CPU_Busy   No. Observations:                 1151
Model:             SARIMAX(3, 1, 3)x(1, 1, 1, 72)   Log Likelihood               -3959.263
Date:                            Fri, 13 Apr 2018   AIC                           7936.526
Time:                                    00:22:17   BIC                           7981.961
Sample:                                01-01-1900   HQIC                          7953.676
                                     - 01-08-1900                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.2134      0.129     -1.660      0.097      -0.465       0.039
ar.L2          0.7812      0.077     10.182      0.000       0.631       0.932
ar.L3          0.0385      0.058      0.664      0.506      -0.075       0.152
ma.L1         -0.5495      0.125     -4.404      0.000      -0.794      -0.305
ma.L2         -0.9244      0.025    -37.205      0.000      -0.973      -0.876
ma.L3          0.5008      0.110      4.570      0.000       0.286       0.716
ar.S.L72      -0.4006      0.033    -12.301      0.000      -0.464      -0.337
ma.S.L72      -0.7273      0.035    -21.023      0.000      -0.795      -0.660
sigma2       150.0872      6.897     21.760      0.000     136.569     163.606
===================================================================================
Ljung-Box (Q):                       96.79   Jarque-Bera (JB):                 0.33
Prob(Q):                              0.00   Prob(JB):                         0.85
Heteroskedasticity (H):               0.87   Skew:                            -0.04
Prob(H) (two-sided):                  0.20   Kurtosis:                         3.03
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [69]:
results_sat.plot_diagnostics(figsize=(10, 7))
plt.show()
In [70]:
#Producing and Visualizing future forecasts (72 intervals in future)
pred_uc_sat = results_sat.get_forecast(steps=73)
pred_ci_sat = pred_uc_sat.conf_int(alpha = 0.1)
pred_ci_sat = pred_ci_sat.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})
In [71]:
ax = df_sat.CPU_Busy[-73:].plot(label='observed', figsize=(10,4))
pred_uc_sat.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci_sat.index,
                pred_ci_sat.iloc[:, 0],
                pred_ci_sat.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Day')
ax.set_ylabel('CPU Load % Saturdays')

plt.legend()
plt.show()
In [72]:
pred_ci_sat['forecast'] = (pred_ci_sat['lower_CI'] + pred_ci_sat['upper_CI'])/2

pred_ci_sat.loc[pred_ci_sat['lower_CI'] < 0, 'lower_CI'] = 0
pred_ci_sat.loc[pred_ci_sat['upper_CI'] > 100, 'upper_CI'] = 100
pred_ci_sat.loc[pred_ci_sat['forecast'] > 100, 'forecast'] = 100
final_pred_sat = pred_ci_sat[1:]
final_pred_sat.head(10)
final_pred_sat.to_csv('C:\\Python Library\\RMG Data\\forcast sat.csv')
In [73]:
fig = plt.figure(figsize=(12,4))
sns.tsplot([final_pred_sat.upper_CI[1:], final_pred_sat.forecast[1:],
            final_pred_sat.lower_CI[1:]],ci = [0,100], color="indianred")
Out[73]:
<matplotlib.axes._subplots.AxesSubplot at 0x1ed44f285c0>
In [32]:
CPU_pred = 15
start_time = str('02:00:00')
end_time = str('05:00:00')

mod = df_sat.between_time(start_time,end_time) + CPU_pred
df_sat_new = pd.merge(df_sat, mod, left_index=True, right_index=True, how='outer')
df_sat_new = df_sat_new.fillna(0)
df_sat_new['CPU_Busy'] = df_sat_new[['CPU_Busy_x', 'CPU_Busy_y']].max(axis=1)
del df_sat_new['CPU_Busy_x']
del df_sat_new['CPU_Busy_y']
df_sat_new.head()
Out[32]:
CPU_Busy
1900-01-01 00:00:00 17.98
1900-01-01 00:10:00 44.26
1900-01-01 00:20:00 51.50
1900-01-01 00:30:00 22.48
1900-01-01 00:40:00 50.66
In [33]:
                                        ## Augmented Dickey Fuller Test ##
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_sat_new.CPU_Busy)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
ADF Statistic: -5.998525
p-value: 0.000000
Critical Values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
In [34]:
                            ##  Plotting ACF and PACF  ##
fig = plt.figure(figsize=(10,4))
pyplot.figure(figsize = (10,4))
pyplot.subplot(211)
plot_acf(df_sat_new.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.subplot(212)
plot_pacf(df_sat_new.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.show()
<matplotlib.figure.Figure at 0x1ed00232898>
In [35]:
mod = sm.tsa.statespace.SARIMAX(df_sat_new['CPU_Busy'],
                                order=(3, 1, 3),
                                seasonal_order=(1, 1, 1, 72),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results_sat_new = mod.fit()

print(results_sat_new.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                           CPU_Busy   No. Observations:                 1151
Model:             SARIMAX(3, 1, 3)x(1, 1, 1, 72)   Log Likelihood               -3966.996
Date:                            Thu, 12 Apr 2018   AIC                           7951.992
Time:                                    19:20:25   BIC                           7997.427
Sample:                                01-01-1900   HQIC                          7969.142
                                     - 01-08-1900                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.2060      0.142     -1.447      0.148      -0.485       0.073
ar.L2          0.7736      0.088      8.827      0.000       0.602       0.945
ar.L3          0.0263      0.061      0.430      0.667      -0.094       0.146
ma.L1         -0.5336      0.139     -3.839      0.000      -0.806      -0.261
ma.L2         -0.9194      0.025    -37.173      0.000      -0.968      -0.871
ma.L3          0.4880      0.121      4.039      0.000       0.251       0.725
ar.S.L72      -0.4254      0.032    -13.269      0.000      -0.488      -0.363
ma.S.L72      -0.7065      0.034    -20.495      0.000      -0.774      -0.639
sigma2       153.0900      7.107     21.540      0.000     139.160     167.020
===================================================================================
Ljung-Box (Q):                      103.72   Jarque-Bera (JB):                 0.19
Prob(Q):                              0.00   Prob(JB):                         0.91
Heteroskedasticity (H):               0.86   Skew:                            -0.03
Prob(H) (two-sided):                  0.16   Kurtosis:                         2.99
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [36]:
#Producing and Visualizing future forecasts (72 intervals in future)
pred_uc_sat_new = results_sat_new.get_forecast(steps=73)
pred_ci_sat_new = pred_uc_sat_new.conf_int(alpha = 0.1)
pred_ci_sat_new = pred_ci_sat_new.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})
In [37]:
ax = df_sat_new.CPU_Busy[-73:].plot(label='observed', figsize=(10,6))
pred_uc_sat_new.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci_sat_new.index,
                pred_ci_sat_new.iloc[:, 0],
                pred_ci_sat_new.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Day')
ax.set_ylabel('New CPU Load % Tuesday')

plt.legend()
plt.show()
In [38]:
pred_ci_sat_new['forecast'] = (pred_ci_sat_new['lower_CI'] + pred_ci_sat_new['upper_CI'])/2

pred_ci_sat_new.loc[pred_ci_sat_new['lower_CI'] < 0, 'lower_CI'] = 0
pred_ci_sat_new.loc[pred_ci_sat_new['upper_CI'] > 100, 'upper_CI'] = 100
pred_ci_sat_new.loc[pred_ci_sat_new['forecast'] > 100, 'forecast'] = 100
final_pred_sat_new = pred_ci_sat_new[1:]
final_pred_sat_new.head()
Out[38]:
lower_CI upper_CI forecast
1900-01-09 00:00:00 8.167296 50.227840 29.197568
1900-01-09 00:10:00 6.752625 50.193319 28.472972
1900-01-09 00:20:00 10.315065 54.570236 32.442650
1900-01-09 00:30:00 8.713045 53.751485 31.232265
1900-01-09 00:40:00 20.351651 65.859960 43.105806
In [39]:
fig = plt.figure(figsize = (12,4))
ax  = fig.add_subplot(111)
ax.plot(final_pred_sat.index, final_pred_sat['forecast'], c='b', label='Base Forecast',linewidth = 3.0)
ax.plot(final_pred_sat_new.index, final_pred_sat_new['forecast'], c='r', label='Expected Shift',linewidth = 2.0)
#ax.plot([0,len(final_pred_sat.index)],[80,80], linewidth=3)

leg = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
# get the lines and texts inside legend box
leg_lines = leg.get_lines()
leg_texts = leg.get_texts()
# bulk-set the properties of all lines and texts
plt.setp(leg_lines, linewidth=3)
plt.setp(leg_texts, fontsize='medium')
plt.title('Impact Analysis of New Job', fontsize = 'medium')
plt.show()
#fig.savefig('C:/Users/manoj.e.kumar.sharma/Desktop/Graphs/Impact Graphs/Impact Analysis-sat.png')

Prediction for Sunday's

In [40]:
warnings.filterwarnings("ignore") # specify to ignore warning messages
df_sun = df1[df1['Weekday'] == 'Sunday']
del df_sun['Weekday']
del df_sun['Index']
df_sun.reset_index(inplace=True)
del df_sun['index']
length = len(df_sun.index)-1
df_sun.drop(df_sun.index[length], inplace=True)
start = datetime.datetime.strptime("00:00:00",'%H:%M:%S')
time_list = [start + relativedelta(minutes=x*10) for x in range(0,length)]
df_sun['index'] = time_list
df_sun.set_index(['index'], inplace=True)
df_sun.index.name=None
In [43]:
#Summary Statistics of series
print(df_sun.describe())
          CPU_Busy
count  1007.000000
mean     17.061966
std      18.823170
min       0.410000
25%       3.610000
50%       8.400000
75%      23.845000
max      89.860000
In [44]:
#Time series plot
ax = df_sun.CPU_Busy.plot(figsize=(10,4), title= 'Hourly CPU Utilization Time Series Plot', fontsize=8)
ax.set(xlabel="Days", ylabel="CPU Utilization Percentage")
Out[44]:
[<matplotlib.text.Text at 0x1ed002ab7b8>,
 <matplotlib.text.Text at 0x1ed27ad2b70>]
In [45]:
decomposition = seasonal_decompose(df_sun.values, freq=72)
fig = plt.figure() 
fig = decomposition.plot()
fig.set_size_inches(12, 5)
<matplotlib.figure.Figure at 0x1ed001b7cf8>
In [46]:
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_sun.CPU_Busy)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
ADF Statistic: -6.465259
p-value: 0.000000
Critical Values:
	1%: -3.437
	5%: -2.864
	10%: -2.568
In [47]:
fig = plt.figure(figsize=(8,4))
pyplot.figure(figsize = (8,4))
pyplot.subplot(211)
plot_acf(df_sun.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.subplot(212)
plot_pacf(df_sun.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.show()
<matplotlib.figure.Figure at 0x1ed001dd940>
In [49]:
mod = sm.tsa.statespace.SARIMAX(df_sun['CPU_Busy'],
                                order=(8, 0, 1),
                                seasonal_order=(1, 1, 1, 72),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results_sun = mod.fit()

print(results_sun.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                           CPU_Busy   No. Observations:                 1007
Model:             SARIMAX(8, 0, 1)x(1, 1, 1, 72)   Log Likelihood               -3032.387
Date:                            Thu, 12 Apr 2018   AIC                           6088.774
Time:                                    23:37:22   BIC                           6147.750
Sample:                                01-01-1900   HQIC                          6111.181
                                     - 01-07-1900                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.3960      0.344      1.152      0.249      -0.278       1.070
ar.L2          0.2077      0.204      1.017      0.309      -0.192       0.608
ar.L3         -0.0533      0.041     -1.297      0.195      -0.134       0.027
ar.L4          0.0003      0.041      0.007      0.995      -0.079       0.080
ar.L5          0.0347      0.035      0.980      0.327      -0.035       0.104
ar.L6          0.0508      0.037      1.359      0.174      -0.022       0.124
ar.L7          0.0038      0.036      0.108      0.914      -0.066       0.074
ar.L8         -0.0830      0.032     -2.595      0.009      -0.146      -0.020
ma.L1          0.2035      0.345      0.590      0.555      -0.473       0.880
ar.S.L72      -0.7191      0.022    -32.861      0.000      -0.762      -0.676
ma.S.L72      -0.3857      0.034    -11.485      0.000      -0.452      -0.320
sigma2        69.6247      1.984     35.099      0.000      65.737      73.513
===================================================================================
Ljung-Box (Q):                       49.70   Jarque-Bera (JB):               972.82
Prob(Q):                              0.14   Prob(JB):                         0.00
Heteroskedasticity (H):               0.84   Skew:                             0.29
Prob(H) (two-sided):                  0.16   Kurtosis:                         8.19
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [50]:
results_sun.plot_diagnostics(figsize=(10, 7))
plt.show()
In [51]:
#Producing and Visualizing future forecasts (72 intervals in future)
pred_uc_sun = results_sun.get_forecast(steps=73)
pred_ci_sun = pred_uc_sun.conf_int(alpha = 0.1)
pred_ci_sun = pred_ci_sun.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})
In [52]:
ax = df_sun.CPU_Busy[-73:].plot(label='observed', figsize=(10,4))
pred_uc_sun.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci_sun.index,
                pred_ci_sun.iloc[:, 0],
                pred_ci_sun.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Day')
ax.set_ylabel('CPU Load % Fridays')

plt.legend()
plt.show()
In [53]:
pred_ci_sun['forecast'] = (pred_ci_sun['lower_CI'] + pred_ci_sun['upper_CI'])/2

pred_ci_sun.loc[pred_ci_sun['lower_CI'] < 0, 'lower_CI'] = 0
pred_ci_sun.loc[pred_ci_sun['upper_CI'] > 100, 'upper_CI'] = 100
pred_ci_sun.loc[pred_ci_sun['forecast'] > 100, 'forecast'] = 100
final_pred_sun = pred_ci_sun[1:]
final_pred_sun.head(10)
final_pred_sun.to_csv('C:\\Python Library\\RMG Data\\forcast sun.csv')
In [54]:
fig = plt.figure(figsize=(12,4))
sns.tsplot([final_pred_sun.upper_CI[1:], final_pred_sun.forecast[1:],
            final_pred_sun.lower_CI[1:]],ci = [0,100], color="indianred")
Out[54]:
<matplotlib.axes._subplots.AxesSubplot at 0x1ed44f42780>
In [55]:
CPU_pred = 15
start_time = str('02:00:00')
end_time = str('05:00:00')

mod = df_sun.between_time(start_time,end_time) + CPU_pred
df_sun_new = pd.merge(df_sun, mod, left_index=True, right_index=True, how='outer')
df_sun_new = df_sun_new.fillna(0)
df_sun_new['CPU_Busy'] = df_sun_new[['CPU_Busy_x', 'CPU_Busy_y']].max(axis=1)
del df_sun_new['CPU_Busy_x']
del df_sun_new['CPU_Busy_y']
df_sun_new.head()
Out[55]:
CPU_Busy
1900-01-01 00:00:00 1.31
1900-01-01 00:10:00 3.80
1900-01-01 00:20:00 23.96
1900-01-01 00:30:00 18.39
1900-01-01 00:40:00 16.65
In [56]:
                                        ## Augmented Dickey Fuller Test ##
from statsmodels.tsa.stattools import adfuller
result = adfuller(df_sun_new.CPU_Busy)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))
ADF Statistic: -5.402877
p-value: 0.000003
Critical Values:
	1%: -3.437
	5%: -2.864
	10%: -2.568
In [57]:
                            ##  Plotting ACF and PACF  ##
fig = plt.figure(figsize=(10,4))
pyplot.figure(figsize = (10,4))
pyplot.subplot(211)
plot_acf(df_sun_new.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.subplot(212)
plot_pacf(df_sun_new.CPU_Busy, ax=pyplot.gca(),lags = 40)
pyplot.show()
<matplotlib.figure.Figure at 0x1ed004a0a20>
In [58]:
mod = sm.tsa.statespace.SARIMAX(df_sun_new['CPU_Busy'],
                                order=(3, 1, 3),
                                seasonal_order=(1, 1, 1, 72),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results_sun_new = mod.fit()

print(results_sun_new.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                           CPU_Busy   No. Observations:                 1007
Model:             SARIMAX(3, 1, 3)x(1, 1, 1, 72)   Log Likelihood               -3053.398
Date:                            Fri, 13 Apr 2018   AIC                           6124.796
Time:                                    00:03:35   BIC                           6169.028
Sample:                                01-01-1900   HQIC                          6141.601
                                     - 01-07-1900                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.8682      0.067    -12.992      0.000      -0.999      -0.737
ar.L2          0.0672      0.055      1.212      0.226      -0.041       0.176
ar.L3          0.5552      0.033     16.954      0.000       0.491       0.619
ma.L1          0.4667      0.063      7.380      0.000       0.343       0.591
ma.L2         -0.5990      0.063     -9.575      0.000      -0.722      -0.476
ma.L3         -0.8857      0.052    -17.080      0.000      -0.987      -0.784
ar.S.L72      -0.7646      0.019    -40.746      0.000      -0.801      -0.728
ma.S.L72      -0.3327      0.032    -10.395      0.000      -0.395      -0.270
sigma2        70.1891      2.029     34.597      0.000      66.213      74.165
===================================================================================
Ljung-Box (Q):                       57.01   Jarque-Bera (JB):              1157.91
Prob(Q):                              0.04   Prob(JB):                         0.00
Heteroskedasticity (H):               0.86   Skew:                             0.22
Prob(H) (two-sided):                  0.19   Kurtosis:                         8.67
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [59]:
#Producing and Visualizing future forecasts (72 intervals in future)
pred_uc_sun_new = results_sun_new.get_forecast(steps=73)
pred_ci_sun_new = pred_uc_sun_new.conf_int(alpha = 0.1)
pred_ci_sun_new = pred_ci_sun_new.rename(columns = {'lower CPU_Busy':'lower_CI','upper CPU_Busy':'upper_CI'})
In [60]:
ax = df_sun_new.CPU_Busy[-73:].plot(label='observed', figsize=(10,6))
pred_uc_sun_new.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci_sun_new.index,
                pred_ci_sun_new.iloc[:, 0],
                pred_ci_sun_new.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Day')
ax.set_ylabel('New CPU Load % Tuesday')

plt.legend()
plt.show()
In [61]:
pred_ci_sun_new['forecast'] = (pred_ci_sun_new['lower_CI'] + pred_ci_sun_new['upper_CI'])/2

pred_ci_sun_new.loc[pred_ci_sun_new['lower_CI'] < 0, 'lower_CI'] = 0
pred_ci_sun_new.loc[pred_ci_sun_new['upper_CI'] > 100, 'upper_CI'] = 100
pred_ci_sun_new.loc[pred_ci_sun_new['forecast'] > 100, 'forecast'] = 100
final_pred_sun_new = pred_ci_sun_new[1:]
final_pred_sun_new.head()
Out[61]:
lower_CI upper_CI forecast
1900-01-08 00:00:00 19.126314 51.572668 35.349491
1900-01-08 00:10:00 4.915413 39.504075 22.209744
1900-01-08 00:20:00 9.425927 44.640908 27.033418
1900-01-08 00:30:00 2.648660 38.207709 20.428184
1900-01-08 00:40:00 1.927139 37.640005 19.783572
In [62]:
fig = plt.figure(figsize = (12,4))
ax  = fig.add_subplot(111)
ax.plot(final_pred_sun.index, final_pred_sun['forecast'], c='b', label='Base Forecast',linewidth = 3.0)
ax.plot(final_pred_sun_new.index, final_pred_sun_new['forecast'], c='r', label='Expected Shift',linewidth = 2.0)
#ax.plot([0,len(final_pred_sun.index)],[80,80], linewidth=3)

leg = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
# get the lines and texts inside legend box
leg_lines = leg.get_lines()
leg_texts = leg.get_texts()
# bulk-set the properties of all lines and texts
plt.setp(leg_lines, linewidth=3)
plt.setp(leg_texts, fontsize='medium')
plt.title('Impact Analysis of New Job', fontsize = 'medium')
plt.show()
#fig.savefig('C:/Users/manoj.e.kumar.sharma/Desktop/Graphs/Impact Graphs/Impact Analysis-sun.png')